ohibc logo
OHI British Columbia | OHI Science | Citation policy

knitr::opts_chunk$set(fig.width = 6, fig.height = 4, fig.path = 'Figs/',
                      echo = TRUE, message = FALSE, warning = FALSE)

library(raster)
library(data.table)


source('~/github/src/R/common.R')  ###
  ### includes library(tidyverse); library(stringr); dir_M points to ohi directory

dir_git <- '~/github/spp_health_dists'

dir_am_data <- file.path(dir_M, 'git-annex/globalprep/_raw_data/aquamaps/d2017')

### goal specific folders and info
dir_setup <- file.path(dir_git, 'data_setup')
dir_data  <- file.path(dir_git, 'data')
dir_o_anx <- file.path(dir_O, 'git-annex/spp_health_dists')
# ### provenance tracking
# library(provRmd); prov_setup()

### support scripts
source('~/github/src/R/rast_tools.R')
  ### raster plotting and analyzing scripts

1 Summary

Create a map of the distribution of biodiversity - all species in AquaMaps assessed by IUCN (using AquaMaps version of IUCN ID numbers). These maps will be generated using all taxonomic groups:

  • All species:
    • Number of species (species richness)
    • Range rarity
    • Relative range rarity
  • IUCN-assessed species:
    • Mean risk
    • Variance of risk
    • Number of species

The following maps will also be generated for taxonomic groups (IUCN-assessed species only): * Mean risk * Variance of risk * Number of species

2 Data Sources

3 Methods

3.1 Spatial distribution of all AquaMaps species

3.1.1 Species richness

Species richness as the raw count of species indicated to exist in a particular cell. This is calculated for all species included in the AquaMaps dataset, regardless of IUCN assessment status.

We can consider species presence in several ways:

  • all species indicated as present at any level (i.e. any non-zero probability of occurrence)
  • all species indicated as “core habitat” (i.e. 60% or greater probability of occurrence)
  • all species possibly present, with presence weighted by probability of occurrence
spp_richness_file <- file.path(dir_data, 'am_all_spp_richness.csv')

if(!file.exists(spp_richness_file)) {
  am_spp_cells <- fread(file.path(dir_o_anx, 'am_files/am_spp_cells_all.csv'),
                        verbose = FALSE)
  
  spp_richness_0pct_probwt <- am_spp_cells %>%
    group_by(loiczid) %>%
    summarize(n_spp_0pct = n(),
              n_spp_probwt = sum(prob))
  
  spp_richness_60pct <- am_spp_cells %>%
    filter(prob >= .60) %>%
    group_by(loiczid) %>%
    summarize(n_spp_60pct = n())
  
  spp_richness <- spp_richness_0pct_probwt %>%
    full_join(spp_richness_60pct, by = 'loiczid')
  
  write_csv(spp_richness, spp_richness_file)

}
spp_richness_file <- file.path(dir_data, 'am_all_spp_richness.csv')
spp_richness <- read_csv(spp_richness_file, col_types = 'dddd') %>%
  mutate(log_n_probwt = log(n_spp_probwt))

loiczid_rast <- raster(file.path(dir_git, 'spatial/loiczid_raster.tif'))


rich_rast_0pct   <- subs(loiczid_rast, spp_richness, by = 'loiczid', which = 'n_spp_0pct')
rich_rast_60pct  <- subs(loiczid_rast, spp_richness, by = 'loiczid', which = 'n_spp_60pct')
rich_rast_probwt <- subs(loiczid_rast, spp_richness, by = 'loiczid', which = 'n_spp_probwt')
rich_rast_log_probwt <- subs(loiczid_rast, spp_richness, by = 'loiczid', which = 'log_n_probwt')

plot(rich_rast_0pct, main = 'Richness, All species, any presence')

plot(rich_rast_60pct, main = 'Richness, All species, prob of occur >= 60%')

plot(rich_rast_probwt, main = 'Richness, All species, weighted by prob of occur')

plot(rich_rast_log_probwt, main = 'Richness, All species, log(prob of occur weighted))')

3.1.2 Species range rarity

Species range rarity is a measure of endemism that weighs the presence of a species in a particular area relative to its entire range. It is defined in Selig et al 2014 (and referenced from Williams et al. 1996: Promise and problems in applying quantitative complementary areas for representing the diversity of some neotropical plants)

\[Range \hspace{3pt} rarity_{cell} = \sum_{i=1}^N \frac{1}{A_i} \times w\]

Relative range rarity is range rarity divided by species richness to remove confounding effects (again as defined in Selig et al 2014).

As in Selig et al 2014, we will multiply rarity by 100,000 and relative rarity by 1000 for analytical convenience.

This is calculated for all species included in the AquaMaps dataset, regardless of IUCN assessment status.

We will calculate range rarity using the same three cuts as for richness; for each cut, species presence, total range, and richness will all be calculated using the same cut method.

spp_rarity_file <- file.path(dir_data, 'am_all_spp_rarity.csv')

if(!file.exists(spp_rarity_file)) {
  if(!exists('am_spp_cells')) {
    am_spp_cells <- fread(file.path(dir_o_anx, 'am_files/am_spp_cells_all.csv'),
                          verbose = FALSE)
  }
  
  am_spp_ranges <- read_csv(file.path(dir_data, 'am_spp_range_summary.csv'))
  
  spp_cells_ranges <- am_spp_cells %>%
    left_join(am_spp_ranges, by = 'am_sid')
  
  spp_rarity_0pct_probwt <- spp_cells_ranges %>%
    group_by(loiczid) %>%
    summarize(rarity_0pct   = sum(  1  / area_km2_0pct),
              rarity_probwt = sum(prob / area_km2_probwt))
  
  spp_rarity_60pct <- spp_cells_ranges %>%
    filter(prob >= .60) %>%
    group_by(loiczid) %>%
    summarize(rarity_60pct = sum( 1 / area_km2_60pct, na.rm = TRUE))
  
  spp_rarity <- spp_rarity_0pct_probwt %>%
    full_join(spp_rarity_60pct, by = 'loiczid') %>%
    mutate(rarity_0pct   = rarity_0pct * 100000,
           rarity_60pct  = rarity_60pct * 100000,
           rarity_probwt = rarity_probwt * 100000)
  
  write_csv(spp_rarity, spp_rarity_file)

}
spp_rarity   <- read_csv(file.path(dir_data, 'am_all_spp_rarity.csv'),
                         col_types = 'dddd')
spp_richness <- read_csv(file.path(dir_data, 'am_all_spp_richness.csv'),
                         col_types = 'dddd')

spp_rel_rare <- spp_rarity %>%
  left_join(spp_richness, by = 'loiczid') %>%
  mutate(rel_rare_0pct   = rarity_0pct / n_spp_0pct * 1000,
         rel_rare_60pct  = rarity_60pct / n_spp_60pct * 1000,
         rel_rare_probwt = rarity_probwt / n_spp_probwt * 1000) %>%
  select(loiczid, contains('rel_rare'))

write_csv(spp_rel_rare, file.path(dir_data, 'am_all_spp_rel_rarity.csv'))
spp_rarity   <- read_csv(file.path(dir_data, 'am_all_spp_rarity.csv'),
                         col_types = 'dddd')
spp_rel_rare <- read_csv(file.path(dir_data, 'am_all_spp_rel_rarity.csv'),
                         col_types = 'dddd')

loiczid_rast <- raster(file.path(dir_git, 'spatial/loiczid_raster.tif'))


rare_rast_0pct   <- subs(loiczid_rast, spp_rarity, 
                         by = 'loiczid', which = 'rarity_0pct')
rare_rast_60pct  <- subs(loiczid_rast, spp_rarity, 
                         by = 'loiczid', which = 'rarity_60pct')
rare_rast_probwt <- subs(loiczid_rast, spp_rarity, 
                         by = 'loiczid', which = 'rarity_probwt')

plot(rare_rast_0pct, main = 'Range rarity, All species, any presence')

plot(rare_rast_60pct, main = 'Range rarity, All species, prob of occur >= 60%')

plot(rare_rast_probwt, main = 'Range rarity, All species, weighted by prob of occur')

### relative range rarity
rel_rare_rast_0pct   <- subs(loiczid_rast, spp_rel_rare, 
                         by = 'loiczid', which = 'rel_rare_0pct')
rel_rare_rast_60pct  <- subs(loiczid_rast, spp_rel_rare, 
                         by = 'loiczid', which = 'rel_rare_60pct')
rel_rare_rast_probwt <- subs(loiczid_rast, spp_rel_rare, 
                         by = 'loiczid', which = 'rel_rare_probwt')

plot(rel_rare_rast_0pct, main = 'Rel range rarity, All spp, any presence')

plot(rel_rare_rast_60pct, main = 'Rel range rarity, All spp, prob of occur >= 60%')

plot(rel_rare_rast_probwt, main = 'Rel range rarity, All spp, weighted by prob of occur')

3.2 Spatial distribution of current extinction risk

3.2.1 Aggregate mean risk and variance by cell

As in the species richness and rarity calculations, we will calculate mean risk using three different considerations of species presence: all presence (non-zero probability of occurrence), “core habitat” (60% or greater prob of occurrence), and probability-weighted.

This binds rows into a long dataframe, but the saved file is rather large. By rounding the decimals a bit, we can shave off quite a bit of file size.

iucn_version <- '2017-3'

cell_risk_file <- file.path(dir_data, sprintf('risk_by_cell_all_%s.csv', iucn_version))


iucn_to_am_lookup <- read_csv(file.path(dir_setup, 'int', 
                                        'am_iucn_crosslisted_spp.csv')) %>%
  select(am_sid = speciesid, iucn_sid = iucn_id) %>%
  distinct()

spp_current_risk <- read_csv(file.path(dir_data, 
                                       sprintf('iucn_risk_current_%s.csv', iucn_version)),
                           col_types = cols_only(iucn_sid = 'i', cat = 'c', cat_score = 'd')) %>%
  select(iucn_sid, cat, cat_score) %>%
  left_join(iucn_to_am_lookup, by = 'iucn_sid') %>%
  as.data.table()

spp_cells <- fread(file.path(dir_o_anx, 'am_files', 'am_spp_cells_assessed.csv'),
                   verbose = FALSE)
## 
Read 42.8% of 16116125 rows
Read 62.5% of 16116125 rows
Read 83.6% of 16116125 rows
Read 16116125 rows and 3 (of 3) columns from 0.338 GB file in 00:00:05
# git_prov(file.path(dir_o_anx, 'am_files', 'am_spp_cells.csv'), filetype = 'input')

### using data.table join:
spp_cells_risk <- spp_current_risk[spp_cells, on = 'am_sid'] %>%
  filter(!is.na(cat_score) & !is.na(iucn_sid))

spp_cells_risk_sum_0pct <- spp_cells_risk %>%
  filter(!is.na(cat_score) & !is.na(iucn_sid)) %>%
  # filter(prob >= 0.60) %>%
  group_by(loiczid) %>%
  summarize(n_spp     = n(), 
            mean_risk = mean(cat_score),
            var_risk  = var(cat_score),
            presence  = '0%')

spp_cells_risk_sum_60pct <- spp_cells_risk %>%
  filter(!is.na(cat_score) & !is.na(iucn_sid)) %>%
  filter(prob >= 0.60) %>%
  group_by(loiczid) %>%
  summarize(n_spp     = n(), 
            mean_risk = mean(cat_score),
            var_risk  = var(cat_score),
            presence  = '60%')

### For the prob weighted calculations, weight the mean and var:
###     mean = 1/n * sum(y)
### ==> mean = 1/sum(prob) * sum(y * prob)
###      var = 1/n * sum[y - E(y)]^2
### ==>  var = 1/sum(prob) * sum([y - E(y)]^2 * prob)
spp_cells_risk_sum_probwt <- spp_cells_risk %>%
  filter(!is.na(cat_score) & !is.na(iucn_sid)) %>%
  # filter(prob >= 0.60) %>%
  group_by(loiczid) %>%
  summarize(n_spp     = sum(prob),
            mean_risk = 1/n_spp * sum(cat_score * prob),
            var_risk  = 1/n_spp * sum((cat_score - mean_risk)^2 * prob),
            presence  = 'prob')

spp_cells_risk_sum <- bind_rows(spp_cells_risk_sum_0pct, 
                                spp_cells_risk_sum_60pct, 
                                spp_cells_risk_sum_probwt) %>%
  mutate(var_risk = ifelse(mean_risk == 0 & is.na(var_risk), 0, var_risk),
         mean_risk = round(mean_risk, 6),
         var_risk  = round(var_risk, 6))
    ### if mean risk == 0, all risks are zero, so zero variance.

write_csv(spp_cells_risk_sum, cell_risk_file)

3.2.2 Mean risks and variance maps

loiczid_rast <- raster(file.path(dir_git, 'spatial/loiczid_raster.tif'))

spp_cells_risk_sum <- read_csv(cell_risk_file,
                         col_types = 'idddc')

cuts <- c('0%', '60%', 'prob')

for(cut in cuts) { ### cut <- cuts[3]
  df_cut <- spp_cells_risk_sum %>%
    filter(presence == cut)
  
  cat(sprintf('#### Mean, variance, and richness based on %s', cut))
  
  risk_rast <- subs(loiczid_rast, df_cut, by = 'loiczid', which = 'mean_risk')
  var_rast  <- subs(loiczid_rast, df_cut, by = 'loiczid', which = 'var_risk')
  nspp_rast <- subs(loiczid_rast, df_cut, by = 'loiczid', which = 'n_spp')
  
  plot(risk_rast, main = sprintf('Mean risk, cut = %s', cut))
  plot(var_rast,  main = sprintf('Variance of risk, cut = %s', cut))
  plot(nspp_rast, main = sprintf('N spp (risk assessed), cut = %s', cut))

}

3.2.2.1 Mean, variance, and richness based on 0%#### Mean, variance, and richness based on 60%#### Mean, variance, and richness based on prob

3.3 Spatial distribution of trend in extinction risk

3.3.1 Aggregate mean trend by cell

Again we will use a probability threshold of 0.60 to determine species presence. We filter out any species with NA for iucn_sid or trend_score.

cell_trend_file <- file.path(dir_data, sprintf('iucn_trend_by_cell_%s.csv', iucn_version))

iucn_to_am_lookup <- read_csv(file.path(dir_data, 'am_iucn_crosslisted_spp.csv')) %>%
  select(am_sid = speciesid, iucn_sid = iucn_id) %>%
  distinct()

spp_trend <- read_csv(file.path(dir_data, 'trend_by_spp.csv')) %>%
  left_join(iucn_to_am_lookup, by = 'iucn_sid') %>%
  as.data.table()

if(!exists('spp_cells')) {
  spp_cells <- fread(file.path(dir_o_anx, 'am_files', 'am_spp_cells_assessed.csv'),
                     verbose = FALSE)
}
# git_prov(file.path(dir_o_anx, 'am_files', 'am_spp_cells.csv'), filetype = 'input')

### using data.table join:
spp_cells_trend <- spp_trend[spp_cells, on = 'am_sid']

spp_cells_trend_sum_0pct <- spp_cells_trend %>%
  filter(!is.na(trend_score) & !is.na(iucn_sid)) %>%
  # filter(prob >= 0.60) %>%
  group_by(loiczid) %>%
  summarize(n_spp     = n(), 
            mean_trend = mean(trend_score),
            var_trend  = var(trend_score),
            presence  = '0%')

spp_cells_trend_sum_60pct <- spp_cells_trend %>%
  filter(!is.na(trend_score) & !is.na(iucn_sid)) %>%
  filter(prob >= 0.60) %>%
  group_by(loiczid) %>%
  summarize(n_spp     = n(), 
            mean_trend = mean(trend_score),
            var_trend  = var(trend_score),
            presence  = '60%')

spp_cells_trend_sum_probwt <- spp_cells_trend %>%
  filter(!is.na(trend_score) & !is.na(iucn_sid)) %>%
  # filter(prob >= 0.60) %>%
  group_by(loiczid) %>%
  summarize(n_spp     = sum(prob),
            mean_trend = 1/n_spp * sum(trend_score * prob),
            var_trend  = 1/n_spp * sum((trend_score - mean_trend)^2 * prob),
            presence  = 'prob')
 
spp_cells_trend_sum <- bind_rows(spp_cells_trend_sum_0pct, 
                                 spp_cells_trend_sum_60pct,
                                 spp_cells_trend_sum_probwt) %>%
    mutate(var_trend  = ifelse(mean_trend == 0 & is.na(var_trend), 0, var_trend),
           mean_trend = round(mean_trend, 6),
           var_trend  = round(var_trend, 6))

write_csv(spp_cells_trend_sum, cell_trend_file)

3.4 Spatial distribution of threatened species

3.4.1 Calc threatened species by cell

Threatened species are those listed by the IUCN as Vulnerable, Endangered, or Critically Endangered. We can count threatened species the same way we determined species richness, filtering on the category (or the category score) to exclude LC, NT, and EX.

iucn_version <- '2017-3'

cell_threatened_file <- file.path(dir_data, 
                                  sprintf('threatened_spp_by_cell_all_%s.csv', iucn_version))


iucn_to_am_lookup <- read_csv(file.path(dir_setup, 'int', 
                                        'am_iucn_crosslisted_spp.csv')) %>%
  select(am_sid = speciesid, iucn_sid = iucn_id) %>%
  distinct()

spp_threatened <- read_csv(file.path(dir_data, 
                                       sprintf('iucn_risk_current_%s.csv', iucn_version)),
                           col_types = cols_only(iucn_sid = 'i', cat = 'c', cat_score = 'd')) %>%
  filter(cat %in% c('VU', 'EN', 'CR')) %>%
  left_join(iucn_to_am_lookup, by = 'iucn_sid') %>%
  as.data.table()

spp_cells <- fread(file.path(dir_o_anx, 'am_files', 'am_spp_cells_assessed.csv'),
                   verbose = FALSE)
## 
Read 13.1% of 16116125 rows
Read 32.9% of 16116125 rows
Read 53.0% of 16116125 rows
Read 73.6% of 16116125 rows
Read 93.8% of 16116125 rows
Read 16116125 rows and 3 (of 3) columns from 0.338 GB file in 00:00:07
# git_prov(file.path(dir_o_anx, 'am_files', 'am_spp_cells.csv'), filetype = 'input')

### using data.table join:
spp_cells_threatened <- spp_threatened[spp_cells, on = 'am_sid'] %>%
  filter(!is.na(cat_score) & !is.na(iucn_sid))

spp_cells_threat_sum_0pct <- spp_cells_threatened %>%
  group_by(loiczid) %>%
  summarize(n_threatened = n(), 
            presence     = '0%')

spp_cells_threat_sum_60pct <- spp_cells_threatened %>%
  filter(prob >= 0.60) %>%
  group_by(loiczid) %>%
  summarize(n_threatened = n(), 
            presence     = '60%')

spp_cells_threat_sum_probwt <- spp_cells_threatened %>%
  group_by(loiczid) %>%
  summarize(n_threatened = sum(prob),
            presence     = 'prob')

spp_cells_threat_sum <- bind_rows(spp_cells_threat_sum_0pct, 
                                  spp_cells_threat_sum_60pct, 
                                  spp_cells_threat_sum_probwt)

write_csv(spp_cells_threat_sum, cell_threatened_file)

3.4.2 Map of threatened spp counts

loiczid_rast <- raster(file.path(dir_git, 'spatial/loiczid_raster.tif'))

spp_cells_threat_sum <- read_csv(cell_threatened_file,
                               col_types = 'idc')

cuts <- c('0%', '60%', 'prob')

for(cut in cuts) { ### cut <- cuts[3]
  df_cut <- spp_cells_threat_sum %>%
    filter(presence == cut)
  
  cat(sprintf('#### Number of threatened species based on %s', cut))
  
  threatened_rast <- subs(loiczid_rast, df_cut, by = 'loiczid', which = 'n_threatened')
  
  plot(threatened_rast, main = sprintf('N of threatened spp, cut = %s', cut))

}

3.4.2.1 Number of threatened species based on 0%#### Number of threatened species based on 60%#### Number of threatened species based on prob